################################################################################
### Replication Code for Appendix H                                          ###
### Title: Do External Threats Increase Bipartisanship in the United States? ###
###        An Experimental Test in the Shadow of China's Rise                ###
### Authors: Eddy S. F. Yeung, Weifang Xu                                    ###
### Version: September 24, 2024                                              ###
################################################################################

### Set-up ----
## Clean the working environment and set up the working directory
rm(list = ls())
# setwd()  # set your working directory here, which should also contain the survey data ("survey_data.csv")

## Load the required packages
library(tidyverse)  # version 2.0.0
library(stm)        # version 1.3.6
library(tm)         # version 0.7-8

## Import the dataset
df <- read.csv("survey_data.csv")

## Create indicators of treatment status
table(df$exp_3)
df <- df %>% mutate(exp_condition = case_when(
  exp_3 == 1 ~ "No Threat Prime",
  exp_3 == 2 ~ "Biden Threat Prime",
  exp_3 == 3 ~ "Trump Threat Prime",
  exp_3 == 4 ~ "Nonpartisan Threat Prime"
))
df$exp_condition <- factor(df$exp_condition,
                           levels = c("No Threat Prime", "Biden Threat Prime", 
                                      "Trump Threat Prime", "Nonpartisan Threat Prime"))
table(df$exp_condition)

## Create a 7-point variable of partisanship (-3 = strong Democrat; 3 = strong Republican)
df <- df %>% mutate(pid7 = case_when(
  pid_1 == 2 & pid_2d == 1 ~ -3,
  pid_1 == 2 & pid_2d == 2 ~ -2,
  (pid_1 == 3 | pid_1 == 4) & pid_2i == 2 ~ -1,
  (pid_1 == 3 | pid_1 == 4) & pid_2i == 4 ~ 0,
  (pid_1 == 3 | pid_1 == 4) & pid_2i == 1 ~ 1,
  pid_1 == 1 & pid_2r == 2 ~ 2,
  pid_1 == 1 & pid_2r == 1 ~ 3
))
table(df$pid7)

## Create a variable that contains all open-ended responses
df <- df %>% mutate(text = case_when(
  exp_condition == "Biden Threat Prime" ~ threat_reinforce_B,
  exp_condition == "Trump Threat Prime" ~ threat_reinforce_T,
  exp_condition == "Nonpartisan Threat Prime" ~ threat_reinforce_N
))

### Process the text data ----
## Convert curly apostrophes to straight apostrophes
df$text <- gsub("[\u2018\u2019\u201A\u201B\u2032\u2035]", "'", df$text)

## Standard processing using stm
processed <- textProcessor(df$text, metadata = df)
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <- out$meta

### Run the STM ----
## Assuming K = 3 (i.e., estimating a 3-topic STM)
selectedmodel <- 
  stm(out$documents, out$vocab, K = 3,
      prevalence =~ exp_condition * pid7,
      data = out$meta, 
      init.type = "Spectral", seed = 1234567)

### Table S4: Top Words and Theme for Each Topic ----
sink("stm_top_words_themes.txt")
labelTopics(selectedmodel)
sink() # Table S4 is "stm_top_words_themes.tex", which is edited from "stm_top_words_themes.txt" exported here

### Table S5: Representative Responses for Each Topic ----
## Top 25 representative responses of Topic 1
findThoughts(selectedmodel, texts = meta$text, n = 25, topics = 1)$docs[[1]]

## Top 25 representative responses of Topic 2
findThoughts(selectedmodel, texts = meta$text, n = 25, topics = 2)$docs[[1]]

## Top 25 representative responses of Topic 3
findThoughts(selectedmodel, texts = meta$text, n = 25, topics = 3)$docs[[1]]

### Figure S18: Expected Topic Proportions across Experimental Groups and Their Relationship with Party Identification ----
## Create an empty PDF file to store the graph
pdf("fig_stm.pdf", height = 10/3, width = 10)
par(mfrow = c(1, 3))

## Plot the heterogeneous treatment effects for Topic 1
set.seed(1234567)
prep <- estimateEffect(c(1) ~ exp_condition * pid7, 
                       selectedmodel,
                       metadata = out$meta,
                       uncertainty = "Global")
par(family = "Times", ps = 14)
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    xlab = "Party Identification",
                    moderator = "exp_condition",
                    moderator.value = "Biden Threat Prime",
                    linecol = "black",
                    ylim = c(0, 1),
                    printlegend = F,
                    main = "Topic 1\n(Commenting on the Report/Trump)")
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "exp_condition",
                    moderator.value = "Trump Threat Prime",
                    linecol = "blue",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "exp_condition",
                    moderator.value = "Nonpartisan Threat Prime",
                    linecol = "red",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
legend("topright", c("Biden Threat Prime", "Trump Threat Prime", "Nonpartisan Threat Prime"), 
       lwd = 2, col = c("black", "blue", "red"))

## Plot the heterogeneous treatment effects for Topic 2
set.seed(1234567)
prep <- estimateEffect(c(2) ~ exp_condition * pid7, 
                       selectedmodel,
                       metadata = out$meta,
                       uncertainty = "Global")
par(family = "Times", ps = 14)
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    xlab = "Party Identification",
                    moderator = "exp_condition",
                    moderator.value = "Biden Threat Prime",
                    linecol = "black",
                    ylim = c(0, 1),
                    printlegend = F,
                    main = "Topic 2\n(Expressing Uncertainty)")
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "exp_condition",
                    moderator.value = "Trump Threat Prime",
                    linecol = "blue",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "exp_condition",
                    moderator.value = "Nonpartisan Threat Prime",
                    linecol = "red",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
legend("topright", c("Biden Threat Prime", "Trump Threat Prime", "Nonpartisan Threat Prime"), 
       lwd = 2, col = c("black", "blue", "red"))

## Plot the heterogeneous treatment effects for Topic 3
set.seed(1234567)
prep <- estimateEffect(c(3) ~ exp_condition * pid7, 
                       selectedmodel,
                       metadata = out$meta,
                       uncertainty = "Global")
par(family = "Times", ps = 14)
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    xlab = "Party Identification",
                    moderator = "exp_condition",
                    moderator.value = "Biden Threat Prime",
                    linecol = "black",
                    ylim = c(0, 1),
                    printlegend = F,
                    main = "Topic 3\n(Discussing China Threat)")
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "exp_condition",
                    moderator.value = "Trump Threat Prime",
                    linecol = "blue",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
plot.estimateEffect(prep,
                    covariate = "pid7",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "exp_condition",
                    moderator.value = "Nonpartisan Threat Prime",
                    linecol = "red",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
legend("topright", c("Biden Threat Prime", "Trump Threat Prime", "Nonpartisan Threat Prime"), 
       lwd = 2, col = c("black", "blue", "red"))
dev.off()
